Read in data:

# Data:
CC_dat <- read.csv("/Users/kellyloria/Documents/UNR/Randomness\ readings/CausalityClass/CM_data.csv") 

Optional: Create arbitrary variable for temporal auto-correlation in plant size?

# Create lagged variable
CC_dat <- CC_dat %>%
  mutate(lag_plant_size = lag(plant_size, default = NA))

Check out parameter covariance:

# covariance correlations 
chart.Correlation(CC_dat, histogram=TRUE, pch=19)
## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

## Warning in par(usr): argument 1 does not name a graphical parameter

Look at linear regression

Global model then remove a parameter
# saturated model:
hist(CC_dat$herbivory)

glb_mod <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + 
                scale(lag_plant_size) + scale(foraging), 
              data=CC_dat)
summary(glb_mod)
## 
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) + 
##     scale(lag_plant_size) + scale(foraging), data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.4817 -0.6490  0.0069  0.6568  3.4223 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           -0.01646    0.03209  -0.513   0.6081    
## scale(fertilizer)      0.03942    0.04029   0.979   0.3280    
## scale(light)           0.26929    0.04574   5.887 5.38e-09 ***
## scale(plant_size)      0.64003    0.04273  14.980  < 2e-16 ***
## scale(lag_plant_size) -0.06740    0.03214  -2.097   0.0362 *  
## scale(foraging)        1.00974    0.03892  25.944  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.014 on 993 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.7105, Adjusted R-squared:  0.709 
## F-statistic: 487.3 on 5 and 993 DF,  p-value: < 2.2e-16
hist(residuals(glb_mod))

vif(glb_mod) # light looks like highest leverage 
##     scale(fertilizer)          scale(light)     scale(plant_size) 
##              1.575339              2.031419              1.771903 
## scale(lag_plant_size)       scale(foraging) 
##              1.001940              1.470318
glb_mod1 <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + scale(foraging), 
              data=CC_dat)
summary(glb_mod1)
## 
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) + 
##     scale(foraging), data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5269 -0.6654 -0.0100  0.6762  3.5751 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.01714    0.03212  -0.534    0.594    
## scale(fertilizer)  0.03956    0.04031   0.981    0.327    
## scale(light)       0.27078    0.04580   5.912 4.65e-09 ***
## scale(plant_size)  0.63951    0.04277  14.953  < 2e-16 ***
## scale(foraging)    1.00683    0.03894  25.854  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.016 on 995 degrees of freedom
## Multiple R-squared:  0.7093, Adjusted R-squared:  0.7081 
## F-statistic: 606.9 on 4 and 995 DF,  p-value: < 2.2e-16
hist(residuals(glb_mod1))

vif(glb_mod1) # light looks like highest leverage 
## scale(fertilizer)      scale(light) scale(plant_size)   scale(foraging) 
##          1.573452          2.031273          1.770796          1.468256
glb_modi <- lm(herbivory~scale(fertilizer) + scale(light) + scale(plant_size) + scale(foraging) + scale(fertilizer * light), 
              data=CC_dat)
summary(glb_modi)
## 
## Call:
## lm(formula = herbivory ~ scale(fertilizer) + scale(light) + scale(plant_size) + 
##     scale(foraging) + scale(fertilizer * light), data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5090 -0.6658 -0.0022  0.6780  3.5047 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)               -0.01714    0.03212  -0.534    0.594    
## scale(fertilizer)          0.03986    0.04031   0.989    0.323    
## scale(light)               0.26876    0.04583   5.865 6.11e-09 ***
## scale(plant_size)          0.64079    0.04277  14.982  < 2e-16 ***
## scale(foraging)            1.00722    0.03894  25.869  < 2e-16 ***
## scale(fertilizer * light)  0.03846    0.03216   1.196    0.232    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.016 on 994 degrees of freedom
## Multiple R-squared:  0.7097, Adjusted R-squared:  0.7082 
## F-statistic:   486 on 5 and 994 DF,  p-value: < 2.2e-16
hist(residuals(glb_modi))

vif(glb_modi) 
##         scale(fertilizer)              scale(light)         scale(plant_size) 
##                  1.573513                  2.034016                  1.771900 
##           scale(foraging) scale(fertilizer * light) 
##                  1.468358                  1.001581
AIC(glb_mod, glb_mod1, glb_modi) # ? better with fake lag...
## Warning in AIC.default(glb_mod, glb_mod1, glb_modi): models are not all fitted
## to the same number of observations
##          df      AIC
## glb_mod   7 2871.477
## glb_mod1  6 2876.204
## glb_modi  7 2876.766

Look at Jonathan Lefcheck’s Piecewise structural equation examples:

Interaction rabit hole…

plant_size_modi <- lm(plant_size ~  fertilizer +  light + fertilizer*light,
                     data = CC_dat)
summary(plant_size_mod)
## 
## Call:
## lm(formula = plant_size ~ fertilizer + light, data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.2049 -0.6430 -0.0068  0.6828  3.3435 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.0007139  0.0307122   0.023    0.981    
## fertilizer  0.4967008  0.0363250  13.674   <2e-16 ***
## light       0.5051542  0.0356793  14.158   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9704 on 997 degrees of freedom
## Multiple R-squared:  0.4349, Adjusted R-squared:  0.4338 
## F-statistic: 383.7 on 2 and 997 DF,  p-value: < 2.2e-16
herb_mod <- lm(herbivory ~ plant_size + light + foraging,
               data = CC_dat)
summary(herb_mod)
## 
## Call:
## lm(formula = herbivory ~ plant_size + light + foraging, data = CC_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.5185 -0.6653  0.0023  0.6761  3.6133 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.004691   0.032127   0.146    0.884    
## plant_size  0.508836   0.030435  16.719  < 2e-16 ***
## light       0.283461   0.045091   6.286 4.85e-10 ***
## foraging    0.828113   0.032026  25.857  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.016 on 996 degrees of freedom
## Multiple R-squared:  0.709,  Adjusted R-squared:  0.7081 
## F-statistic: 808.9 on 3 and 996 DF,  p-value: < 2.2e-16
# Use the `psem` function to create the SEM
CC_psem_model2 <- psem(plant_size_modi, herb_mod)
## Warning: NAs detected in the dataset. Consider removing all rows with NAs to
## prevent fitting to different subsets of data
# Look at object
CC_psem_model2 # Sometimes it works better to specifiy models in line, with called df
## Structural Equations of x :
## lm: plant_size ~ fertilizer + light + fertilizer * light
## lm: herbivory ~ plant_size + light + foraging
## 
## Data:
##   fertilizer      light plant_size   foraging  herbivory lag_plant_size
## 1  0.5208573 -0.3872029 -0.7957658 -0.6570881 -1.6942758             NA
## 2  0.8155222 -0.1671887 -1.4537077  0.4605768 -0.7142346     -0.7957658
## 3  0.8835973 -1.4635119 -0.5343125 -2.0035209 -2.9674024     -1.4537077
## 4 -0.2264909 -1.4187651  0.1868491  0.4587782 -0.1949140     -0.5343125
## 5 -1.4803645 -0.1481271 -0.9388211 -0.1277455 -0.5076546      0.1868491
## 6  0.5107644  1.5497233 -0.3658381  0.9979259  1.2264027     -0.9388211
## ...with  994  more rows
## 
## [1] "class(psem)"
# Plot SEM with standardized coefficients
plot(CC_psem_model2)
# Use `summary` function to get all information at once
summary(CC_psem_model2)
## 
  |                                                                            
  |                                                                      |   0%
  |                                                                            
  |===================================                                   |  50%
  |                                                                            
  |======================================================================| 100%
## 
## Structural Equation Model of CC_psem_model2 
## 
## Call:
##   plant_size ~ fertilizer + light + fertilizer * light
##   herbivory ~ plant_size + light + foraging
## 
##     AIC
##  5659.228
## 
## ---
## Tests of directed separation:
## 
##                 Independ.Claim Test.Type  DF Crit.Value P.Value 
##   herbivory ~ fertilizer + ...      coef 995     0.9813  0.3267 
##    plant_size ~ foraging + ...      coef 995     0.7771  0.4373 
## 
## --
## Global goodness-of-fit:
## 
## Chi-Squared = 1.574 with P-value = 0.455 and on 2 degrees of freedom
## Fisher's C = 3.892 with P-value = 0.421 and on 4 degrees of freedom
## 
## ---
## Coefficients:
## 
##     Response        Predictor Estimate Std.Error  DF Crit.Value P.Value
##   plant_size       fertilizer   0.4962    0.0363 996    13.6551  0.0000
##   plant_size            light   0.5060    0.0357 996    14.1728  0.0000
##   plant_size fertilizer:light  -0.0226    0.0285 996    -0.7945  0.4271
##    herbivory       plant_size   0.5088    0.0304 996    16.7190  0.0000
##    herbivory            light   0.2835    0.0451 996     6.2864  0.0000
##    herbivory         foraging   0.8281    0.0320 996    25.8573  0.0000
##   Std.Estimate    
##         0.3743 ***
##         0.3886 ***
##        -0.0189    
##         0.3490 ***
##         0.1493 ***
##         0.5355 ***
## 
##   Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05
## 
## ---
## Individual R-squared:
## 
##     Response method R.squared
##   plant_size   none      0.44
##    herbivory   none      0.71
# Get P-values
P1 <- summary(plant_size_modi)$coefficients[2, "Pr(>|t|)"]
P2 <- summary(herb_mod)$coefficients[2, "Pr(>|t|)"]

# Construct C-statistic
C <- -2 * (log(P1) + log(P2))
C
## [1] 428.6605
fisherC(CC_psem_model2) 
##   Fisher.C df P.Value
## 1    3.892  4   0.421
AIC(CC_psem_model2, CC_psem_model)
##        AIC  K    n
## 1 5659.228 10 1000
## 2 5657.861  9 1000

Fit Bayesian models

plant_size_b <- bf(plant_size ~  fertilizer +  light)
herb_b <- bf(herbivory ~ plant_size + light + foraging)

CC_fit_brms <- brm(plant_size_b +
                     herb_b + 
                     set_rescor(FALSE), 
                   data = CC_dat,
                   iter = 5000, warmup = 2500,
                   cores = 3, chains = 3)
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded

## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded
## Compiling Stan program...
## Trying to compile a simple C file
## Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB foo.c
## using C compiler: ‘Apple clang version 15.0.0 (clang-1500.1.0.2.5)’
## using SDK: ‘MacOSX14.2.sdk’
## clang -arch x86_64 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/Rcpp/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/unsupported"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/BH/include" -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/src/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppParallel/include/"  -I"/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include '/Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/opt/R/x86_64/include    -fPIC  -falign-functions=64 -Wall -g -O2  -c foo.c -o foo.o
## In file included from <built-in>:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/Dense:1:
## In file included from /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/Core:19:
## /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/library/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: 'cmath' file not found
## #include <cmath>
##          ^~~~~~~
## 1 error generated.
## make: *** [foo.o] Error 1
## Start sampling
## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded

## Warning: In subset.data.frame(x$frame$re, resp = x$resp) :
##  extra argument 'resp' will be disregarded

Trace plots

plot(CC_fit_brms)

Extract posterior estimates for mean path coefficients

# Extract posterior summaries
posterior_summaries <- posterior_summary(CC_fit_brms)

#  Extract estimates and CIs for paths
estimates <- posterior_summaries[, "Estimate"]
ci_lower <- posterior_summaries[, "Q2.5"]
ci_upper <- posterior_summaries[, "Q97.5"]

# Create path labels
fertilizer_to_plant_size <- paste0("mean = ", round(estimates["b_plantsize_fertilizer"], 2), 
                                   ", CI = [", round(ci_lower["b_plantsize_fertilizer"], 2), 
                                   ", ", round(ci_upper["b_plantsize_fertilizer"], 2), "]")
light_to_plant_size <- paste0("mean = ", round(estimates["b_plantsize_light"], 2), 
                              ", CI = [", round(ci_lower["b_plantsize_light"], 2), 
                              ", ", round(ci_upper["b_plantsize_light"], 2), "]")
plant_size_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_plant_size"], 2), 
                                  ", CI = [", round(ci_lower["b_herbivory_plant_size"], 2), 
                                  ", ", round(ci_upper["b_herbivory_plant_size"], 2), "]")
light_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_light"], 2), 
                             ", CI = [", round(ci_lower["b_herbivory_light"], 2), 
                             ", ", round(ci_upper["b_herbivory_light"], 2), "]")
foraging_to_herbivory <- paste0("mean = ", round(estimates["b_herbivory_foraging"], 2), 
                                ", CI = [", round(ci_lower["b_herbivory_foraging"], 2), 
                                ", ", round(ci_upper["b_herbivory_foraging"], 2), "]")

Plot using grViz

# Create the DAG plot
dag_plot <- grViz(paste0("
  digraph DAG {
    graph [layout = dot, rankdir = TB]

    node [shape = box]
    fertilizer -> plant_size [label = '", fertilizer_to_plant_size, "']
    light -> plant_size [label = '", light_to_plant_size, "']
    plant_size -> herbivory [label = '", plant_size_to_herbivory, "']
    light -> herbivory [label = '", light_to_herbivory, "']
    foraging -> herbivory [label = '", foraging_to_herbivory, "']

    plant_size [label = 'Plant Size']
    fertilizer [label = 'Fertilizer']
    light [label = 'Light']
    herbivory [label = 'Herbivory']
    foraging [label = 'Foraging']
  }
"))

dag_plot